In [1]:
#-----------------------------------------------------------------------
# Juan David Correa www.astropema.com March 2025
#----------------------------------------------------------------------


import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt

# Load the datasets
book1 = pd.read_csv("raw_data_books/book1.csv")
book2 = pd.read_csv("raw_data_books/book2.csv")
book3 = pd.read_csv("raw_data_books/book3.csv")
book4 = pd.read_csv("raw_data_books/book4.csv")
book5 = pd.read_csv("raw_data_books/book5.csv")

# Rename columns to match the expected names
book1 = book1.rename(columns={'Person 1': 'Source', 'Person 2': 'Target', 'weight': 'Weight'})
book2 = book2.rename(columns={'Person 1': 'Source', 'Person 2': 'Target', 'weight': 'Weight'})
book3 = book3.rename(columns={'Person 1': 'Source', 'Person 2': 'Target', 'weight': 'Weight'})
book4 = book4.rename(columns={'Person 1': 'Source', 'Person 2': 'Target', 'weight': 'Weight'})
book5 = book5.rename(columns={'Person 1': 'Source', 'Person 2': 'Target', 'weight': 'Weight'})

# Combine the datasets
data = pd.concat([book1, book2, book3, book4, book5])

# Display the first few rows of the combined dataset
print(data.head())

# Create a graph object
G = nx.Graph()

# Add edges to the graph
for index, row in data.iterrows():
    G.add_edge(row['Source'], row['Target'], weight=row['Weight'])

# Use a layout algorithm for better positioning
pos = nx.spring_layout(G, k=0.15, iterations=20)

# Visualize the network and save as an image file
plt.figure(figsize=(50, 50))
nx.draw(G, pos, with_labels=True, node_size=300, font_size=10, font_weight='bold', edge_color='gray')
plt.savefig("game_of_thrones_network.png", format="PNG", dpi=300)
plt.show()

# Print basic information about the graph
print(f"Graph has {G.number_of_nodes()} nodes and {G.number_of_edges()} edges")

# Print the first few nodes and edges
print("Nodes:", list(G.nodes)[:10])
print("Edges:", list(G.edges(data=True))[:10])

# Degree centrality
degree_centrality = nx.degree_centrality(G)
print("Degree Centrality:", sorted(degree_centrality.items(), key=lambda x: x[1], reverse=True)[:10])

# Betweenness centrality
betweenness_centrality = nx.betweenness_centrality(G)
print("Betweenness Centrality:", sorted(betweenness_centrality.items(), key=lambda x: x[1], reverse=True)[:10])

# Closeness centrality
closeness_centrality = nx.closeness_centrality(G)
print("Closeness Centrality:", sorted(closeness_centrality.items(), key=lambda x: x[1], reverse=True)[:10])
                            Source              Target        Type  Weight  \
0                   Addam-Marbrand     Jaime-Lannister  Undirected       3   
1                   Addam-Marbrand     Tywin-Lannister  Undirected       6   
2                Aegon-I-Targaryen  Daenerys-Targaryen  Undirected       5   
3                Aegon-I-Targaryen        Eddard-Stark  Undirected       4   
4  Aemon-Targaryen-(Maester-Aemon)      Alliser-Thorne  Undirected       4   

   book  
0   1.0  
1   1.0  
2   1.0  
3   1.0  
4   1.0  
No description has been provided for this image
Graph has 796 nodes and 2823 edges
Nodes: ['Addam-Marbrand', 'Jaime-Lannister', 'Tywin-Lannister', 'Aegon-I-Targaryen', 'Daenerys-Targaryen', 'Eddard-Stark', 'Aemon-Targaryen-(Maester-Aemon)', 'Alliser-Thorne', 'Bowen-Marsh', 'Chett']
Edges: [('Addam-Marbrand', 'Jaime-Lannister', {'weight': 8}), ('Addam-Marbrand', 'Tywin-Lannister', {'weight': 6}), ('Addam-Marbrand', 'Gyles-Rosby', {'weight': 3}), ('Addam-Marbrand', 'Jalabhar-Xho', {'weight': 3}), ('Addam-Marbrand', 'Joffrey-Baratheon', {'weight': 3}), ('Addam-Marbrand', 'Kevan-Lannister', {'weight': 3}), ('Addam-Marbrand', 'Oberyn-Martell', {'weight': 3}), ('Addam-Marbrand', 'Tyrion-Lannister', {'weight': 7}), ('Addam-Marbrand', 'Varys', {'weight': 4}), ('Addam-Marbrand', 'Brynden-Tully', {'weight': 3})]
Degree Centrality: [('Tyrion-Lannister', 0.15345911949685534), ('Jon-Snow', 0.14339622641509434), ('Jaime-Lannister', 0.1270440251572327), ('Cersei-Lannister', 0.1220125786163522), ('Stannis-Baratheon', 0.11194968553459118), ('Arya-Stark', 0.10566037735849056), ('Catelyn-Stark', 0.09433962264150943), ('Sansa-Stark', 0.09433962264150943), ('Eddard-Stark', 0.0930817610062893), ('Robb-Stark', 0.0930817610062893)]
Betweenness Centrality: [('Jon-Snow', 0.19211961968354493), ('Tyrion-Lannister', 0.16219109611159815), ('Daenerys-Targaryen', 0.11841801916269228), ('Theon-Greyjoy', 0.11128331813470259), ('Stannis-Baratheon', 0.11013955266679568), ('Jaime-Lannister', 0.10083800602581651), ('Cersei-Lannister', 0.0887042665586028), ('Arya-Stark', 0.08724236063571156), ('Eddard-Stark', 0.07873239257251853), ('Robert-Baratheon', 0.07822769060383253)]
Closeness Centrality: [('Tyrion-Lannister', 0.4763331336129419), ('Robert-Baratheon', 0.4592720970537262), ('Eddard-Stark', 0.455848623853211), ('Cersei-Lannister', 0.45454545454545453), ('Jaime-Lannister', 0.4519613416714042), ('Jon-Snow', 0.44537815126050423), ('Stannis-Baratheon', 0.4446308724832215), ('Robb-Stark', 0.4441340782122905), ('Joffrey-Baratheon', 0.4339519650655022), ('Catelyn-Stark', 0.4334787350054526)]
In [2]:
# Game of Thrones Network Analysis

## Introduction
'''

Network analysis is a powerful tool for understanding the relationships and interactions within a dataset. 
In this project, we analyze the network of characters from the Game of Thrones series to uncover insights 
about their interactions and importance within the story.

### Objectives
- Construct a network graph from the Game of Thrones dataset.
- Perform basic network analysis to understand the structure of the network.
- Calculate centrality measures to identify important characters.
- Perform community detection to identify groups of closely connected characters.

'''

## Data Loading and Preprocessing
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from networkx.algorithms.community import greedy_modularity_communities

# Load the datasets
book1 = pd.read_csv("raw_data_books/book1.csv")
book2 = pd.read_csv("raw_data_books/book2.csv")
book3 = pd.read_csv("raw_data_books/book3.csv")
book4 = pd.read_csv("raw_data_books/book4.csv")
book5 = pd.read_csv("raw_data_books/book5.csv")

# Combine the datasets
data = pd.concat([book1, book2, book3, book4, book5])

# Display the first few rows of the combined dataset
data.head()
Out[2]:
Person 1 Person 2 Type weight book
0 Addam-Marbrand Jaime-Lannister Undirected 3 1.0
1 Addam-Marbrand Tywin-Lannister Undirected 6 1.0
2 Aegon-I-Targaryen Daenerys-Targaryen Undirected 5 1.0
3 Aegon-I-Targaryen Eddard-Stark Undirected 4 1.0
4 Aemon-Targaryen-(Maester-Aemon) Alliser-Thorne Undirected 4 1.0
In [3]:
# Function to visualize community
def visualize_community(Graph, filter=None, filter_nodes=None):
    # Assign positions to nodes using a layout algorithm
    pos = nx.spring_layout(Graph)
    nx.set_node_attributes(Graph, pos, 'pos')

    # Filter nodes by degree if specified
    if filter == "Yes" and filter_nodes is not None:
        nodes_to_keep = [node for node, degree in dict(Graph.degree()).items() if degree >= filter_nodes]
        Graph = Graph.subgraph(nodes_to_keep).copy()

    # Perform community detection
    communities = greedy_modularity_communities(Graph)
    community_map = {}
    for i, community in enumerate(communities):
        for node in community:
            community_map[node] = i

    # Create a plotly figure
    fig = go.Figure()

    # Add edges to the plotly figure
    for edge in Graph.edges(data=True):
        x0, y0 = Graph.nodes[edge[0]]['pos']
        x1, y1 = Graph.nodes[edge[1]]['pos']
        fig.add_trace(go.Scatter(x=[x0, x1, None], y=[y0, y1, None],
                                 mode='lines',
                                 line=dict(width=0.5, color='#888'),
                                 hoverinfo='none'))

    # Add nodes to the plotly figure
    node_x = []
    node_y = []
    node_color = []
    node_text = []
    for node in Graph.nodes():
        x, y = Graph.nodes[node]['pos']
        node_x.append(x)
        node_y.append(y)
        node_color.append(community_map[node])
        node_text.append(node)

    fig.add_trace(go.Scatter(x=node_x, y=node_y,
                             mode='markers+text',
                             text=node_text,
                             textposition="top center",
                             marker=dict(size=10,
                                         color=node_color,
                                         colorscale='Viridis',
                                         colorbar=dict(title="Community"),
                                         line=dict(width=2)),
                             hoverinfo='text'))

    # Update layout
    fig.update_layout(showlegend=False,
                      hovermode='closest',
                      margin=dict(b=0, l=0, r=0, t=0),
                      xaxis=dict(showgrid=False, zeroline=False),
                      yaxis=dict(showgrid=False, zeroline=False))

    fig.show()

# Visualize the community with filtering
visualize_community(Graph=G, filter="Yes", filter_nodes=50)
In [ ]: